In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
In [5]:
base_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'
confirmed_df = pd.read_csv(base_url + 'time_series_covid19_confirmed_global.csv')
In [6]:
confirmed_df
Out[6]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 2/21/22 2/22/22 2/23/22 2/24/22 2/25/22 2/26/22 2/27/22 2/28/22 3/1/22 3/2/22
0 NaN Afghanistan 33.939110 67.709953 0 0 0 0 0 0 ... 172441 172716 172901 173047 173084 173146 173395 173659 173879 174073
1 NaN Albania 41.153300 20.168300 0 0 0 0 0 0 ... 270370 270455 270734 270947 271141 271141 271527 271563 271702 271825
2 NaN Algeria 28.033900 1.659600 0 0 0 0 0 0 ... 264201 264365 264488 264603 264706 264778 264855 264936 265010 265079
3 NaN Andorra 42.506300 1.521800 0 0 0 0 0 0 ... 37589 37820 37901 37958 37999 37999 37999 37999 38165 38249
4 NaN Angola -11.202700 17.873900 0 0 0 0 0 0 ... 98658 98671 98698 98701 98701 98701 98701 98741 98746 98746
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
279 NaN West Bank and Gaza 31.952200 35.233200 0 0 0 0 0 0 ... 633684 636055 638172 640214 640214 640214 640214 645947 647203 648039
280 NaN Winter Olympics 2022 39.904200 116.407400 0 0 0 0 0 0 ... 509 509 510 510 514 514 518 521 524 524
281 NaN Yemen 15.552727 48.516388 0 0 0 0 0 0 ... 11741 11746 11751 11751 11759 11760 11769 11771 11771 11771
282 NaN Zambia -13.133897 27.849332 0 0 0 0 0 0 ... 311264 311592 311888 312118 312374 312611 312707 312750 312970 313203
283 NaN Zimbabwe -19.015438 29.154857 0 0 0 0 0 0 ... 233571 233980 234589 234967 235467 235803 235803 236380 236871 237503

284 rows × 775 columns

In [7]:
#removing Province/State, Lat, Long column
confirmed_df = confirmed_df.drop(columns = ['Lat','Long', 'Province/State'])
In [8]:
#checking data frame
confirmed_df.head()
Out[8]:
Country/Region 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 1/29/20 1/30/20 ... 2/21/22 2/22/22 2/23/22 2/24/22 2/25/22 2/26/22 2/27/22 2/28/22 3/1/22 3/2/22
0 Afghanistan 0 0 0 0 0 0 0 0 0 ... 172441 172716 172901 173047 173084 173146 173395 173659 173879 174073
1 Albania 0 0 0 0 0 0 0 0 0 ... 270370 270455 270734 270947 271141 271141 271527 271563 271702 271825
2 Algeria 0 0 0 0 0 0 0 0 0 ... 264201 264365 264488 264603 264706 264778 264855 264936 265010 265079
3 Andorra 0 0 0 0 0 0 0 0 0 ... 37589 37820 37901 37958 37999 37999 37999 37999 38165 38249
4 Angola 0 0 0 0 0 0 0 0 0 ... 98658 98671 98698 98701 98701 98701 98701 98741 98746 98746

5 rows × 772 columns

In [11]:
#merging rows for same country/regions
confirmed_df = confirmed_df.groupby(by='Country/Region').aggregate(np.sum).T
In [12]:
#checking data frame
confirmed_df
Out[12]:
Country/Region Afghanistan Albania Algeria Andorra Angola Antarctica Antigua and Barbuda Argentina Armenia Australia ... Uruguay Uzbekistan Vanuatu Venezuela Vietnam West Bank and Gaza Winter Olympics 2022 Yemen Zambia Zimbabwe
1/22/20 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1/23/20 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 2 0 0 0 0 0
1/24/20 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 2 0 0 0 0 0
1/25/20 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 2 0 0 0 0 0
1/26/20 0 0 0 0 0 0 0 0 0 4 ... 0 0 0 0 2 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2/26/22 173146 271141 264778 37999 98701 11 7437 8893568 419423 3188117 ... 838341 236174 19 514315 3219177 640214 514 11760 312611 235803
2/27/22 173395 271527 264855 37999 98701 11 7437 8897178 419693 3209599 ... 840511 236299 19 514315 3321005 640214 518 11769 312707 235803
2/28/22 173659 271563 264936 37999 98741 11 7437 8900656 419832 3235017 ... 842462 236405 19 515124 3443485 645947 521 11771 312750 236380
3/1/22 173879 271702 265010 38165 98746 11 7447 8904176 420156 3256772 ... 844400 236501 19 515124 3557629 647203 524 11771 312970 236871
3/2/22 174073 271825 265079 38249 98746 11 7449 8912317 420498 3296784 ... 846868 236596 19 515582 3709481 648039 524 11771 313203 237503

771 rows × 198 columns

In [13]:
#changing column name (Country/Region to Date)
confirmed_df.index.name = 'Date'
#reset index
confirmed_df = confirmed_df.reset_index()
In [15]:
confirmed_df.tail()
Out[15]:
Country/Region Date Afghanistan Albania Algeria Andorra Angola Antarctica Antigua and Barbuda Argentina Armenia ... Uruguay Uzbekistan Vanuatu Venezuela Vietnam West Bank and Gaza Winter Olympics 2022 Yemen Zambia Zimbabwe
766 2/26/22 173146 271141 264778 37999 98701 11 7437 8893568 419423 ... 838341 236174 19 514315 3219177 640214 514 11760 312611 235803
767 2/27/22 173395 271527 264855 37999 98701 11 7437 8897178 419693 ... 840511 236299 19 514315 3321005 640214 518 11769 312707 235803
768 2/28/22 173659 271563 264936 37999 98741 11 7437 8900656 419832 ... 842462 236405 19 515124 3443485 645947 521 11771 312750 236380
769 3/1/22 173879 271702 265010 38165 98746 11 7447 8904176 420156 ... 844400 236501 19 515124 3557629 647203 524 11771 312970 236871
770 3/2/22 174073 271825 265079 38249 98746 11 7449 8912317 420498 ... 846868 236596 19 515582 3709481 648039 524 11771 313203 237503

5 rows × 199 columns

In [16]:
#melt dataframe
confirmed_melt_df = confirmed_df.melt(id_vars='Date').copy()
In [17]:
#checking melting
confirmed_melt_df
Out[17]:
Date Country/Region value
0 1/22/20 Afghanistan 0
1 1/23/20 Afghanistan 0
2 1/24/20 Afghanistan 0
3 1/25/20 Afghanistan 0
4 1/26/20 Afghanistan 0
... ... ... ...
152653 2/26/22 Zimbabwe 235803
152654 2/27/22 Zimbabwe 235803
152655 2/28/22 Zimbabwe 236380
152656 3/1/22 Zimbabwe 236871
152657 3/2/22 Zimbabwe 237503

152658 rows × 3 columns

In [18]:
#rename column
confirmed_melt_df.rename(columns={'value':'Confirmed'},inplace=True)
In [19]:
confirmed_melt_df.head()
Out[19]:
Date Country/Region Confirmed
0 1/22/20 Afghanistan 0
1 1/23/20 Afghanistan 0
2 1/24/20 Afghanistan 0
3 1/25/20 Afghanistan 0
4 1/26/20 Afghanistan 0
In [20]:
#converting Date(strings) to actual day/time format
max_date = confirmed_melt_df['Date'].max()
max_date
Out[20]:
'9/9/21'
In [21]:
confirmed_melt_df['Date'] = pd.to_datetime(confirmed_melt_df['Date'])
In [22]:
confirmed_melt_df.head()
Out[22]:
Date Country/Region Confirmed
0 2020-01-22 Afghanistan 0
1 2020-01-23 Afghanistan 0
2 2020-01-24 Afghanistan 0
3 2020-01-25 Afghanistan 0
4 2020-01-26 Afghanistan 0
In [23]:
confirmed_melt_df['Date'] = confirmed_melt_df['Date'].dt.strftime('%m/%d/%y')
In [24]:
max_date  = confirmed_melt_df['Date'].max()
In [25]:
max_date
Out[25]:
'12/31/21'
In [26]:
#vizualization of data
In [28]:
#maximum date confirmed cases number
total_confirmed_df = confirmed_melt_df[confirmed_melt_df['Date']==max_date]
total_confirmed_df
Out[28]:
Date Country/Region Confirmed
709 12/31/21 Afghanistan 158084
1480 12/31/21 Albania 210224
2251 12/31/21 Algeria 218432
3022 12/31/21 Andorra 23740
3793 12/31/21 Angola 81593
... ... ... ...
149512 12/31/21 West Bank and Gaza 469748
150283 12/31/21 Winter Olympics 2022 0
151054 12/31/21 Yemen 10126
151825 12/31/21 Zambia 254274
152596 12/31/21 Zimbabwe 213258

198 rows × 3 columns

In [29]:
# sum of total confirmed cases
total_confirmed = total_confirmed_df['Confirmed'].sum()
total_confirmed
Out[29]:
288666476
In [30]:
#total cases per country 
fig = px.bar(total_confirmed_df, x='Country/Region', y='Confirmed')
fig.show()
In [32]:
# showing top 30 confirmed countries
fig = px.bar(total_confirmed_df.sort_values('Confirmed',ascending=False).head(30), x='Country/Region', y='Confirmed')
fig.show()
In [34]:
#historical trend of countries
figTwo = px.scatter(confirmed_melt_df,x='Date', y='Confirmed',color='Country/Region')
figTwo.show()
In [35]:
#specific country affection rate
figThree = px.line(confirmed_melt_df[confirmed_melt_df['Country/Region']=='Bangladesh'],x='Date', y='Confirmed')
figThree.show()
In [ ]: